General results:

summarised_df <- results_data_frame |> 
  group_by(data_generation, data_fitted) |> 
  summarise(mean_point              = mean(point, na.rm = TRUE),
            mean_ci_length_norm     = mean(conf_int_normal_upper - conf_int_normal_lower, na.rm = TRUE),
            coverage_ci_norm        = mean((conf_int_normal_lower < 1000) & (1000 < conf_int_normal_upper), na.rm = TRUE),
            mean_ci_length_log_norm = mean(conf_int_log_normal_upper - conf_int_log_normal_lower, na.rm = TRUE),
            coverage_ci_log_norm    = mean((conf_int_log_normal_lower < 1000) & (1000 < conf_int_log_normal_upper), na.rm = TRUE),
            succesful_fits          = mean(!is.na(point)))
## `summarise()` has grouped output by 'data_generation'. You can override using
## the `.groups` argument.
print(summarised_df, n=20)
## # A tibble: 80 × 8
## # Groups:   data_generation [12]
##    data_generation data_fitted mean_point mean_ci_length_norm coverage_ci_norm
##    <chr>           <chr>            <dbl>               <dbl>            <dbl>
##  1 Hurdleztgeom    correct          1008.               168.             0.938
##  2 Hurdleztgeom    negbin           1003.               340.             0.927
##  3 Hurdleztgeom    oizt              873.                69.7            0    
##  4 Hurdleztgeom    poisson           700.                24.1            0    
##  5 Hurdleztgeom    wrong_link       1008.               168.             0.938
##  6 Hurdleztgeom    ztHurdle         1094.               220.             0.654
##  7 Hurdleztgeom    ztoi              874.                78.9            0    
##  8 Hurdleztnegbin  correct           999.               236.             0.914
##  9 Hurdleztnegbin  geom             1188.               181.             0.004
## 10 Hurdleztnegbin  oizt              990.               181.             0.864
## 11 Hurdleztnegbin  poisson           816.                22.2            0    
## 12 Hurdleztnegbin  wrong_link        999.               237.             0.918
## 13 Hurdleztnegbin  ztHurdle     23285419.          56970543.             0.934
## 14 Hurdleztnegbin  ztoi            64086.             34427.             0.908
## 15 Hurdleztpoisson correct          1002.               104.             0.964
## 16 Hurdleztpoisson geometric        2279.               820.             0    
## 17 Hurdleztpoisson oizt              908.                42.5            0    
## 18 Hurdleztpoisson wrong_link       1003.               103.             0.962
## 19 Hurdleztpoisson ztHurdle         1057.               140.             0.687
## 20 Hurdleztpoisson ztoi              908.                42.5            0    
## # ℹ 60 more rows
## # ℹ 3 more variables: mean_ci_length_log_norm <dbl>,
## #   coverage_ci_log_norm <dbl>, succesful_fits <dbl>
pp <- summarised_df |>
  subset(succesful_fits < 1) |>
  as.data.frame() |> 
  mutate(data_generation = ordered(data_generation)) |>
  ggplot(aes(y = succesful_fits, x = data_fitted)) +
  geom_point() +
  facet_wrap(~data_generation, scales = c("free_x"), ncol = 3) + 
  ylab("Fitted proportion") +
  xlab("Model fitted") +
  ggtitle("Proportion of succesfully fitted models by true distribution of counts")

pp

Visualising outliers (i.e. when estimated regression parameters tend to boundary):

outliers <- results_data_frame |>
  subset(!is.na(point)) |>
  subset(point > 5000) |> 
  group_by(data_generation, data_fitted) |>
  summarise(n = n()) |>
  ggplot(aes(x = data_fitted, weight = n)) +
  geom_bar() +
  facet_wrap(~ data_generation, scales = c("free_x")) + 
  ylab("Number of outliers") +
  xlab("Model fitted") +
  ggtitle("Exreme outliers (estimate > 5 * true size) by true distribution of counts")
## `summarise()` has grouped output by 'data_generation'. You can override using
## the `.groups` argument.
outliers

Point estimates

Results for counts generated by ztoipoisson:

p1 <- results_data_frame |>
  subset(!is.na(point) & (data_generation == "ztoipoisson")) |>
  subset(point < 5000) |>
  ggplot(aes(x = data_fitted, y = point)) +
  geom_jitter(alpha = 0.05, shape = 1) + 
  geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
  stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") + 
  geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
  ylab("Point estimate") +
  xlab("Model fitted")

p1

Summary statistics after excluding outliers:

results_data_frame |>
  subset(!is.na(point) & (data_generation == "ztoipoisson")) |>
  subset(point < 5000) |>
  group_by(data_fitted) |>
  summarise(bias = mean(point - 1000),
            rel_bias = mean(point - 1000) / 1000,
            MSE = mean((point - 1000)^2),
            MAE = mean(abs(point - 1000)),
            coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
            coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))

Results for counts generated by oiztpoisson:

p2 <- results_data_frame |>
  subset(!is.na(point) & (data_generation == "oiztpoisson")) |>
  subset(point < 5000) |>
  ggplot(aes(x = data_fitted, y = point)) +
  geom_jitter(alpha = 0.05, shape = 1) + 
  geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
  stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") + 
  geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
  ylab("Point estimate") +
  xlab("Model fitted")

p2

Summary statistics after excluding outliers:

results_data_frame |>
  subset(!is.na(point) & (data_generation == "oiztpoisson")) |>
  subset(point < 5000) |>
  group_by(data_fitted) |>
  summarise(bias = mean(point - 1000),
            rel_bias = mean(point - 1000) / 1000,
            MSE = mean((point - 1000)^2),
            MAE = mean(abs(point - 1000)),
            coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
            coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))

Results for counts generated by ztHurdlepoisson:

p3 <- results_data_frame |>
  subset(!is.na(point) & (data_generation == "ztHurdlepoisson")) |>
  subset(point < 5000) |>
  ggplot(aes(x = data_fitted, y = point)) +
  geom_jitter(alpha = 0.05, shape = 1) + 
  geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
  stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") + 
  geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
  ylab("Point estimate") +
  xlab("Model fitted")

p3

Summary statistics after excluding outliers:

results_data_frame |>
  subset(!is.na(point) & (data_generation == "ztHurdlepoisson")) |>
  subset(point < 5000) |>
  group_by(data_fitted) |>
  summarise(bias = mean(point - 1000),
            rel_bias = mean(point - 1000) / 1000,
            MSE = mean((point - 1000)^2),
            MAE = mean(abs(point - 1000)),
            coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
            coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))

Results for counts generated by hurdleztpoisson:

p4 <- results_data_frame |>
  subset(!is.na(point) & (data_generation == "Hurdleztpoisson")) |>
  subset(point < 5000) |>
  ggplot(aes(x = data_fitted, y = point)) +
  geom_jitter(alpha = 0.05, shape = 1) + 
  geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
  stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") + 
  geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
  ylab("Point estimate") +
  xlab("Model fitted")

p4

Summary statistics after excluding outliers:

results_data_frame |>
  subset(!is.na(point) & (data_generation == "Hurdleztpoisson")) |>
  subset(point < 5000) |>
  group_by(data_fitted) |>
  summarise(bias = mean(point - 1000),
            rel_bias = mean(point - 1000) / 1000,
            MSE = mean((point - 1000)^2),
            MAE = mean(abs(point - 1000)),
            coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
            coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))

Results for counts generated by ztoigeom:

p5 <- results_data_frame |>
  subset(!is.na(point) & (data_generation == "ztoigeom")) |>
  subset(point < 5000) |>
  ggplot(aes(x = data_fitted, y = point)) +
  geom_jitter(alpha = 0.05, shape = 1) + 
  geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
  stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") + 
  geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
  ylab("Point estimate") +
  xlab("Model fitted")

p5

Summary statistics after excluding outliers:

results_data_frame |>
  subset(!is.na(point) & (data_generation == "ztoigeom")) |>
  subset(point < 5000) |>
  group_by(data_fitted) |>
  summarise(bias = mean(point - 1000),
            rel_bias = mean(point - 1000) / 1000,
            MSE = mean((point - 1000)^2),
            MAE = mean(abs(point - 1000)),
            coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
            coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))

Results for counts generated by oiztgeom:

p6 <- results_data_frame |>
  subset(!is.na(point) & (data_generation == "oiztgeom")) |>
  subset(point < 5000) |>
  ggplot(aes(x = data_fitted, y = point)) +
  geom_jitter(alpha = 0.05, shape = 1) + 
  geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
  stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") + 
  geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
  ylab("Point estimate") +
  xlab("Model fitted")

p6

Summary statistics after excluding outliers:

results_data_frame |>
  subset(!is.na(point) & (data_generation == "oiztgeom")) |>
  subset(point < 5000) |>
  group_by(data_fitted) |>
  summarise(bias = mean(point - 1000),
            rel_bias = mean(point - 1000) / 1000,
            MSE = mean((point - 1000)^2),
            MAE = mean(abs(point - 1000)),
            coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
            coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))

Results for counts generated by ztHurdlegeom:

p7 <- results_data_frame |>
  subset(!is.na(point) & (data_generation == "ztHurdlegeom")) |>
  subset(point < 5000) |>
  ggplot(aes(x = data_fitted, y = point)) +
  geom_jitter(alpha = 0.05, shape = 1) + 
  geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
  stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") + 
  geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
  ylab("Point estimate") +
  xlab("Model fitted")

p7

Summary statistics after excluding outliers:

results_data_frame |>
  subset(!is.na(point) & (data_generation == "ztHurdlegeom")) |>
  subset(point < 5000) |>
  group_by(data_fitted) |>
  summarise(bias = mean(point - 1000),
            rel_bias = mean(point - 1000) / 1000,
            MSE = mean((point - 1000)^2),
            MAE = mean(abs(point - 1000)),
            coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
            coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))

Results for counts generated by hurdleztgeom:

p8 <- results_data_frame |>
  subset(!is.na(point) & (data_generation == "Hurdleztgeom")) |>
  subset(point < 5000) |>
  ggplot(aes(x = data_fitted, y = point)) +
  geom_jitter(alpha = 0.05, shape = 1) + 
  geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
  stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") + 
  geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
  ylab("Point estimate") +
  xlab("Model fitted")

p8

Summary statistics after excluding outliers:

results_data_frame |>
  subset(!is.na(point) & (data_generation == "Hurdleztgeom")) |>
  subset(point < 5000) |>
  group_by(data_fitted) |>
  summarise(bias = mean(point - 1000),
            rel_bias = mean(point - 1000) / 1000,
            MSE = mean((point - 1000)^2),
            MAE = mean(abs(point - 1000)),
            coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
            coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))

Results for counts generated by ztoinegbin:

p9 <- results_data_frame |>
  subset(!is.na(point) & (data_generation == "ztoinegbin")) |>
  subset(point < 5000) |>
  ggplot(aes(x = data_fitted, y = point)) +
  geom_jitter(alpha = 0.05, shape = 1) + 
  geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
  stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") + 
  geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
  ylab("Point estimate") +
  xlab("Model fitted")

p9

Summary statistics after excluding outliers:

results_data_frame |>
  subset(!is.na(point) & (data_generation == "ztoinegbin")) |>
  subset(point < 5000) |>
  group_by(data_fitted) |>
  summarise(bias = mean(point - 1000),
            rel_bias = mean(point - 1000) / 1000,
            MSE = mean((point - 1000)^2),
            MAE = mean(abs(point - 1000)),
            coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
            coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))

Results for counts generated by oiztnegbin:

p10 <- results_data_frame |>
  subset(!is.na(point) & (data_generation == "oiztnegbin")) |>
  subset(point < 5000) |>
  ggplot(aes(x = data_fitted, y = point)) +
  geom_jitter(alpha = 0.05, shape = 1) + 
  geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
  stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") + 
  geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
  ylab("Point estimate") +
  xlab("Model fitted")

p10

Summary statistics after excluding outliers:

results_data_frame |>
  subset(!is.na(point) & (data_generation == "oiztnegbin")) |>
  subset(point < 5000) |>
  group_by(data_fitted) |>
  summarise(bias = mean(point - 1000),
            rel_bias = mean(point - 1000) / 1000,
            MSE = mean((point - 1000)^2),
            MAE = mean(abs(point - 1000)),
            coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
            coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))

Results for counts generated by ztHurdlenegbin:

p11 <- results_data_frame |>
  subset(!is.na(point) & (data_generation == "ztHurdlenegbin")) |>
  subset(point < 5000) |>
  ggplot(aes(x = data_fitted, y = point)) +
  geom_jitter(alpha = 0.05, shape = 1) + 
  geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
  stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") + 
  geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
  ylab("Point estimate") +
  xlab("Model fitted")

p11

Summary statistics after excluding outliers:

results_data_frame |>
  subset(!is.na(point) & (data_generation == "ztHurdlenegbin")) |>
  subset(point < 5000) |>
  group_by(data_fitted) |>
  summarise(bias = mean(point - 1000),
            rel_bias = mean(point - 1000) / 1000,
            MSE = mean((point - 1000)^2),
            MAE = mean(abs(point - 1000)),
            coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
            coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))

Results for counts generated by hurdleztnegbin:

p12 <- results_data_frame |>
  subset(!is.na(point) & (data_generation == "Hurdleztnegbin")) |>
  subset(point < 5000) |>
  ggplot(aes(x = data_fitted, y = point)) +
  geom_jitter(alpha = 0.05, shape = 1) + 
  geom_violin(alpha = 0.8, draw_quantiles = 1:9 / 10, scale = "width") +
  stat_summary(fun = function(x) mean(x, na.rm = TRUE), geom = "point") + 
  geom_hline(yintercept = 1000, linetype="dashed", color = "red") +
  ylab("Point estimate") +
  xlab("Model fitted")

p12

Summary statistics after excluding outliers:

results_data_frame |>
  subset(!is.na(point) & (data_generation == "Hurdleztnegbin")) |>
  subset(point < 5000) |>
  group_by(data_fitted) |>
  summarise(bias = mean(point - 1000),
            rel_bias = mean(point - 1000) / 1000,
            MSE = mean((point - 1000)^2),
            MAE = mean(abs(point - 1000)),
            coverage_normal = mean((conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000)),
            coverage_log_normal = mean((conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)))

Confidence intervals

Normal

Exact binomial tests for coverage of lognormal confindence intervals with \(H_0:p=0.95, H_1:p\neq0.95\):

dd <- results_data_frame |>
  subset(!is.na(point)) |>
  subset(point < 5000) |>
  mutate(covr_norm = (conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000),
         covr_log  = (conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)) |>
  group_by(data_generation, data_fitted) |>
  summarise(n = n(),
            mean = mean(covr_norm, na.rm = TRUE))
## `summarise()` has grouped output by 'data_generation'. You can override using
## the `.groups` argument.
dd <- cbind(dd, p_value = NA, lower = NA, upper = NA, signif = NA)

for (x in 1:NROW(dd)) {
  jj <- binom.test(x = as.numeric(dd[x, 4]) * as.integer(dd[x, 3]), n = as.integer(dd[x, 3]), p = .95)
  # this jj object has some very weird interactions with the rest of R ecosystem
  dd[x, 5] <- jj$p.value |> as.numeric()
  dd[x, 6] <- jj[[4]][1]
  dd[x, 7] <- jj[[4]][2]
  dd[x, 8] <- ifelse(dd[x, 5] < .001, "***", ifelse(dd[x, 5] < .01, "**", ifelse(dd[x, 5] < .05, "*", ifelse(dd[x, 5] < .1, ".", ""))))
}

dd |> 
  mutate(p_value = round(p_value, digits = 4),
         lower   = round(lower, digits = 4),
         upper   = round(upper, digits = 4)) |>
  print(n = NROW(dd))
## # A tibble: 80 × 8
## # Groups:   data_generation [12]
##    data_generation data_fitted     n   mean p_value  lower  upper signif
##    <chr>           <chr>       <int>  <dbl>   <dbl>  <dbl>  <dbl> <chr> 
##  1 Hurdleztgeom    correct       500 0.938   0.217  0.913  0.958  ""    
##  2 Hurdleztgeom    negbin        494 0.927   0.0292 0.900  0.948  "*"   
##  3 Hurdleztgeom    oizt          500 0       0      0      0.0074 "***" 
##  4 Hurdleztgeom    poisson       490 0       0      0      0.0075 "***" 
##  5 Hurdleztgeom    wrong_link    500 0.938   0.217  0.913  0.958  ""    
##  6 Hurdleztgeom    ztHurdle      500 0.654   0      0.610  0.696  "***" 
##  7 Hurdleztgeom    ztoi          500 0       0      0      0.0074 "***" 
##  8 Hurdleztnegbin  correct       500 0.914   0.0006 0.886  0.937  "***" 
##  9 Hurdleztnegbin  geom          500 0.004   0      0.0005 0.0144 "***" 
## 10 Hurdleztnegbin  oizt          500 0.864   0      0.831  0.893  "***" 
## 11 Hurdleztnegbin  poisson       500 0       0      0      0.0074 "***" 
## 12 Hurdleztnegbin  wrong_link    500 0.918   0.002  0.890  0.940  "**"  
## 13 Hurdleztnegbin  ztHurdle      492 0.945   0.604  0.921  0.964  ""    
## 14 Hurdleztnegbin  ztoi          497 0.913   0.0006 0.885  0.937  "***" 
## 15 Hurdleztpoisson correct       499 0.964   0.181  0.944  0.978  ""    
## 16 Hurdleztpoisson geometric     500 0       0      0      0.0074 "***" 
## 17 Hurdleztpoisson oizt          497 0       0      0      0.0074 "***" 
## 18 Hurdleztpoisson wrong_link    499 0.962   0.258  0.941  0.977  ""    
## 19 Hurdleztpoisson ztHurdle      499 0.687   0      0.645  0.728  "***" 
## 20 Hurdleztpoisson ztoi          497 0       0      0      0.0074 "***" 
## 21 oiztgeom        Hurdlezt      500 0.946   0.681  0.922  0.964  ""    
## 22 oiztgeom        correct       500 0.91    0.0002 0.881  0.934  "***" 
## 23 oiztgeom        negbin        494 0.850   0      0.816  0.880  "***" 
## 24 oiztgeom        poisson       491 0       0      0      0.0075 "***" 
## 25 oiztgeom        wrong_link    500 0.9     0      0.870  0.925  "***" 
## 26 oiztgeom        ztHurdle      500 0.004   0      0.0005 0.0144 "***" 
## 27 oiztgeom        ztoi          500 0.702   0      0.660  0.742  "***" 
## 28 oiztnegbin      Hurdlezt      500 0.99    0      0.977  0.997  "***" 
## 29 oiztnegbin      correct       500 0.986   0      0.971  0.994  "***" 
## 30 oiztnegbin      geom          500 0       0      0      0.0074 "***" 
## 31 oiztnegbin      poisson       499 0       0      0      0.0074 "***" 
## 32 oiztnegbin      wrong_link    500 0.988   0      0.974  0.996  "***" 
## 33 oiztnegbin      ztHurdle      491 0.0387  0      0.0235 0.0598 "***" 
## 34 oiztnegbin      ztoi          494 0.229   0      0.192  0.268  "***" 
## 35 oiztpoisson     Hurdlezt      499 0.754   0      0.713  0.791  "***" 
## 36 oiztpoisson     correct       495 0.931   0.0627 0.905  0.952  "."   
## 37 oiztpoisson     geometric     500 0       0      0      0.0074 "***" 
## 38 oiztpoisson     wrong_link    493 0.923   0.0093 0.896  0.945  "**"  
## 39 oiztpoisson     ztHurdle      499 0       0      0      0.0074 "***" 
## 40 oiztpoisson     ztoi          498 0.807   0      0.770  0.841  "***" 
## 41 ztHurdlegeom    Hurdlezt      500 0.86    0      0.826  0.889  "***" 
## 42 ztHurdlegeom    correct       500 0.96    0.355  0.939  0.975  ""    
## 43 ztHurdlegeom    negbin        486 0.932   0.0761 0.906  0.953  "."   
## 44 ztHurdlegeom    oizt          500 0       0      0      0.0074 "***" 
## 45 ztHurdlegeom    poisson       500 0       0      0      0.0074 "***" 
## 46 ztHurdlegeom    wrong_link    495 0.962   0.301  0.941  0.977  ""    
## 47 ztHurdlegeom    ztoi          500 0       0      0      0.0074 "***" 
## 48 ztHurdlenegbin  Hurdlezt      500 0.576   0      0.531  0.620  "***" 
## 49 ztHurdlenegbin  correct       495 0.941   0.354  0.917  0.960  ""    
## 50 ztHurdlenegbin  geom          500 0       0      0      0.0074 "***" 
## 51 ztHurdlenegbin  oizt          500 0.012   0      0.0044 0.0259 "***" 
## 52 ztHurdlenegbin  poisson       500 0       0      0      0.0074 "***" 
## 53 ztHurdlenegbin  wrong_link    497 0.942   0.409  0.917  0.961  ""    
## 54 ztHurdlenegbin  ztoi          499 0.0160  0      0.0069 0.0313 "***" 
## 55 ztHurdlepoisson Hurdlezt      500 0.8     0      0.762  0.834  "***" 
## 56 ztHurdlepoisson correct       500 0.946   0.681  0.922  0.964  ""    
## 57 ztHurdlepoisson geometric     500 0       0      0      0.0074 "***" 
## 58 ztHurdlepoisson oizt          499 0       0      0      0.0074 "***" 
## 59 ztHurdlepoisson wrong_link    500 0.946   0.681  0.922  0.964  ""    
## 60 ztHurdlepoisson ztoi          498 0       0      0      0.0074 "***" 
## 61 ztoigeom        Hurdlezt      500 0.774   0      0.735  0.810  "***" 
## 62 ztoigeom        correct       500 0.948   0.837  0.925  0.966  ""    
## 63 ztoigeom        negbin        440 0.868   0      0.833  0.898  "***" 
## 64 ztoigeom        oizt          500 0.83    0      0.794  0.862  "***" 
## 65 ztoigeom        poisson       497 0       0      0      0.0074 "***" 
## 66 ztoigeom        wrong_link    500 0.95    1      0.927  0.967  ""    
## 67 ztoigeom        ztHurdle      500 0.13    0      0.102  0.163  "***" 
## 68 ztoinegbin      Hurdlezt      500 0.55    0      0.505  0.594  "***" 
## 69 ztoinegbin      correct       493 0.921   0.0051 0.893  0.943  "**"  
## 70 ztoinegbin      geom          500 0       0      0      0.0074 "***" 
## 71 ztoinegbin      oizt          500 0.58    0      0.535  0.624  "***" 
## 72 ztoinegbin      poisson       500 0       0      0      0.0074 "***" 
## 73 ztoinegbin      wrong_link    494 0.925   0.0169 0.898  0.947  "*"   
## 74 ztoinegbin      ztHurdle      491 0.986   0      0.971  0.994  "***" 
## 75 ztoipoisson     Hurdlezt      499 0.465   0      0.420  0.510  "***" 
## 76 ztoipoisson     correct       496 0.938   0.215  0.912  0.957  ""    
## 77 ztoipoisson     geometric     500 0       0      0      0.0074 "***" 
## 78 ztoipoisson     oizt          490 0.829   0      0.792  0.861  "***" 
## 79 ztoipoisson     wrong_link    497 0.940   0.302  0.915  0.959  ""    
## 80 ztoipoisson     ztHurdle      499 0.0180  0      0.0083 0.034  "***"
## show only those that have high p value or have better coverage
dd |>
  filter(p_value > .05 | mean > .95) |>
  mutate(p_value = round(p_value, digits = 4),
         lower   = round(lower, digits = 4),
         upper   = round(upper, digits = 4)) |>
  print(n = NROW(dd))
## # A tibble: 22 × 8
## # Groups:   data_generation [12]
##    data_generation data_fitted     n  mean p_value lower upper signif
##    <chr>           <chr>       <int> <dbl>   <dbl> <dbl> <dbl> <chr> 
##  1 Hurdleztgeom    correct       500 0.938  0.217  0.913 0.958 ""    
##  2 Hurdleztgeom    wrong_link    500 0.938  0.217  0.913 0.958 ""    
##  3 Hurdleztnegbin  ztHurdle      492 0.945  0.604  0.921 0.964 ""    
##  4 Hurdleztpoisson correct       499 0.964  0.181  0.944 0.978 ""    
##  5 Hurdleztpoisson wrong_link    499 0.962  0.258  0.941 0.977 ""    
##  6 oiztgeom        Hurdlezt      500 0.946  0.681  0.922 0.964 ""    
##  7 oiztnegbin      Hurdlezt      500 0.99   0      0.977 0.997 "***" 
##  8 oiztnegbin      correct       500 0.986  0      0.971 0.994 "***" 
##  9 oiztnegbin      wrong_link    500 0.988  0      0.974 0.996 "***" 
## 10 oiztpoisson     correct       495 0.931  0.0627 0.905 0.952 "."   
## 11 ztHurdlegeom    correct       500 0.96   0.355  0.939 0.975 ""    
## 12 ztHurdlegeom    negbin        486 0.932  0.0761 0.906 0.953 "."   
## 13 ztHurdlegeom    wrong_link    495 0.962  0.301  0.941 0.977 ""    
## 14 ztHurdlenegbin  correct       495 0.941  0.354  0.917 0.960 ""    
## 15 ztHurdlenegbin  wrong_link    497 0.942  0.409  0.917 0.961 ""    
## 16 ztHurdlepoisson correct       500 0.946  0.681  0.922 0.964 ""    
## 17 ztHurdlepoisson wrong_link    500 0.946  0.681  0.922 0.964 ""    
## 18 ztoigeom        correct       500 0.948  0.837  0.925 0.966 ""    
## 19 ztoigeom        wrong_link    500 0.95   1      0.927 0.967 ""    
## 20 ztoinegbin      ztHurdle      491 0.986  0      0.971 0.994 "***" 
## 21 ztoipoisson     correct       496 0.938  0.215  0.912 0.957 ""    
## 22 ztoipoisson     wrong_link    497 0.940  0.302  0.915 0.959 ""

Visual results with confidence intervals:

qq1 <- dd |>
  ggplot(aes(x = data_fitted)) +
  facet_wrap(~ data_generation, scales = c("free_x"), ncol = 3) +
  geom_point(aes(y = mean), colour = "navy", cex = 2) +
  geom_errorbar(aes(ymin = lower, ymax = upper), colour = "navy") +
  ggtitle("Empirical coverage of studentized confidence intervals by true distribution of counts") +
  xlab("Fitted distribution") +
  ylab("Coverage")

qq1

Average sizes of confidence intervals:

qq2 <- results_data_frame |>
  subset(!is.na(point)) |>
  subset(point < 5000) |>
  group_by(data_generation, data_fitted) |>
  summarise(len = mean(conf_int_normal_upper - conf_int_normal_lower, na.rm = TRUE)) |>
  ggplot(aes(x = data_fitted, weight = len)) +
  geom_bar() +
  facet_wrap(~ data_generation, scales = c("free_x"), ncol = 3) +
  ylab("Average length") +
  xlab("Fitted distribution") +
  ggtitle("Empirical size of studentized confidence intervals by true distribution of counts")
## `summarise()` has grouped output by 'data_generation'. You can override using
## the `.groups` argument.
qq2

Logormal

Exact binomial tests for coverage of normal confindence intervals with \(H_0:p=0.95, H_1:p\neq0.95\):

dd <- results_data_frame |>
  subset(!is.na(point)) |>
  subset(point < 5000) |>
  mutate(covr_norm = (conf_int_normal_lower < 1000) & (conf_int_normal_upper > 1000),
         covr_log  = (conf_int_log_normal_lower < 1000) & (conf_int_log_normal_upper > 1000)) |>
  group_by(data_generation, data_fitted) |>
  summarise(n = n(),
            mean = mean(covr_log, na.rm = TRUE))
## `summarise()` has grouped output by 'data_generation'. You can override using
## the `.groups` argument.
dd <- cbind(dd, p_value = NA, lower = NA, upper = NA, signif = NA)

for (x in 1:NROW(dd)) {
  jj <- binom.test(x = as.numeric(dd[x, 4]) * as.integer(dd[x, 3]), n = as.integer(dd[x, 3]), p = .95)
  # this jj object has some very weird interactions with the rest of R ecosystem
  dd[x, 5] <- jj$p.value |> as.numeric()
  dd[x, 6] <- jj[[4]][1]
  dd[x, 7] <- jj[[4]][2]
  dd[x, 8] <- ifelse(dd[x, 5] < .001, "***", ifelse(dd[x, 5] < .01, "**", ifelse(dd[x, 5] < .05, "*", ifelse(dd[x, 5] < .1, ".", ""))))
}

dd |> 
  mutate(p_value = round(p_value, digits = 4),
         lower   = round(lower, digits = 4),
         upper   = round(upper, digits = 4)) |>
  print(n = NROW(dd))
## # A tibble: 80 × 8
## # Groups:   data_generation [12]
##    data_generation data_fitted     n    mean p_value  lower  upper signif
##    <chr>           <chr>       <int>   <dbl>   <dbl>  <dbl>  <dbl> <chr> 
##  1 Hurdleztgeom    correct       500 0.944    0.537  0.920  0.962  ""    
##  2 Hurdleztgeom    negbin        494 0.960    0.408  0.938  0.975  ""    
##  3 Hurdleztgeom    oizt          500 0        0      0      0.0074 "***" 
##  4 Hurdleztgeom    poisson       490 0        0      0      0.0075 "***" 
##  5 Hurdleztgeom    wrong_link    500 0.944    0.537  0.920  0.962  ""    
##  6 Hurdleztgeom    ztHurdle      500 0.542    0      0.497  0.586  "***" 
##  7 Hurdleztgeom    ztoi          500 0        0      0      0.0074 "***" 
##  8 Hurdleztnegbin  correct       500 0.948    0.837  0.925  0.966  ""    
##  9 Hurdleztnegbin  geom          500 0.002    0      0.0001 0.0111 "***" 
## 10 Hurdleztnegbin  oizt          500 0.902    0      0.872  0.927  "***" 
## 11 Hurdleztnegbin  poisson       500 0        0      0      0.0074 "***" 
## 12 Hurdleztnegbin  wrong_link    500 0.948    0.837  0.925  0.966  ""    
## 13 Hurdleztnegbin  ztHurdle      492 0.549    0      0.504  0.593  "***" 
## 14 Hurdleztnegbin  ztoi          497 0.901    0      0.872  0.926  "***" 
## 15 Hurdleztpoisson correct       499 0.958    0.473  0.936  0.974  ""    
## 16 Hurdleztpoisson geometric     500 0        0      0      0.0074 "***" 
## 17 Hurdleztpoisson oizt          497 0        0      0      0.0074 "***" 
## 18 Hurdleztpoisson wrong_link    499 0.954    0.758  0.932  0.971  ""    
## 19 Hurdleztpoisson ztHurdle      499 0.553    0      0.508  0.597  "***" 
## 20 Hurdleztpoisson ztoi          497 0        0      0      0.0074 "***" 
## 21 oiztgeom        Hurdlezt      500 0.952    0.918  0.929  0.969  ""    
## 22 oiztgeom        correct       500 0.892    0      0.861  0.918  "***" 
## 23 oiztgeom        negbin        494 0.844    0      0.809  0.875  "***" 
## 24 oiztgeom        poisson       491 0        0      0      0.0075 "***" 
## 25 oiztgeom        wrong_link    500 0.89     0      0.859  0.916  "***" 
## 26 oiztgeom        ztHurdle      500 0.004    0      0.0005 0.0144 "***" 
## 27 oiztgeom        ztoi          500 0.608    0      0.564  0.651  "***" 
## 28 oiztnegbin      Hurdlezt      500 0.844    0      0.809  0.875  "***" 
## 29 oiztnegbin      correct       500 0.832    0      0.796  0.864  "***" 
## 30 oiztnegbin      geom          500 0        0      0      0.0074 "***" 
## 31 oiztnegbin      poisson       499 0        0      0      0.0074 "***" 
## 32 oiztnegbin      wrong_link    500 0.832    0      0.796  0.864  "***" 
## 33 oiztnegbin      ztHurdle      491 0.00407  0      0.0005 0.0146 "***" 
## 34 oiztnegbin      ztoi          494 0.0466   0      0.0297 0.069  "***" 
## 35 oiztpoisson     Hurdlezt      499 0.836    0      0.800  0.867  "***" 
## 36 oiztpoisson     correct       495 0.929    0.0389 0.903  0.950  "*"   
## 37 oiztpoisson     geometric     500 0        0      0      0.0074 "***" 
## 38 oiztpoisson     wrong_link    493 0.927    0.0229 0.900  0.948  "*"   
## 39 oiztpoisson     ztHurdle      499 0        0      0      0.0074 "***" 
## 40 oiztpoisson     ztoi          498 0.717    0      0.675  0.756  "***" 
## 41 ztHurdlegeom    Hurdlezt      500 0.892    0      0.861  0.918  "***" 
## 42 ztHurdlegeom    correct       500 0.956    0.608  0.934  0.972  ""    
## 43 ztHurdlegeom    negbin        486 0.938    0.251  0.913  0.958  ""    
## 44 ztHurdlegeom    oizt          500 0        0      0      0.0074 "***" 
## 45 ztHurdlegeom    poisson       500 0        0      0      0.0074 "***" 
## 46 ztHurdlegeom    wrong_link    495 0.958    0.535  0.936  0.974  ""    
## 47 ztHurdlegeom    ztoi          500 0        0      0      0.0074 "***" 
## 48 ztHurdlenegbin  Hurdlezt      500 0.752    0      0.712  0.789  "***" 
## 49 ztHurdlenegbin  correct       495 0.966    0.121  0.946  0.980  ""    
## 50 ztHurdlenegbin  geom          500 0        0      0      0.0074 "***" 
## 51 ztHurdlenegbin  oizt          500 0.04     0      0.0246 0.0611 "***" 
## 52 ztHurdlenegbin  poisson       500 0        0      0      0.0074 "***" 
## 53 ztHurdlenegbin  wrong_link    497 0.966    0.122  0.946  0.98   ""    
## 54 ztHurdlenegbin  ztoi          499 0.0341   0      0.02   0.054  "***" 
## 55 ztHurdlepoisson Hurdlezt      500 0.87     0      0.837  0.898  "***" 
## 56 ztHurdlepoisson correct       500 0.952    0.918  0.929  0.969  ""    
## 57 ztHurdlepoisson geometric     500 0        0      0      0.0074 "***" 
## 58 ztHurdlepoisson oizt          499 0        0      0      0.0074 "***" 
## 59 ztHurdlepoisson wrong_link    500 0.952    0.918  0.929  0.969  ""    
## 60 ztHurdlepoisson ztoi          498 0        0      0      0.0074 "***" 
## 61 ztoigeom        Hurdlezt      500 0.822    0      0.786  0.854  "***" 
## 62 ztoigeom        correct       500 0.936    0.150  0.911  0.956  ""    
## 63 ztoigeom        negbin        440 0.861    0      0.826  0.892  "***" 
## 64 ztoigeom        oizt          500 0.856    0      0.822  0.886  "***" 
## 65 ztoigeom        poisson       497 0        0      0      0.0074 "***" 
## 66 ztoigeom        wrong_link    500 0.934    0.101  0.909  0.954  ""    
## 67 ztoigeom        ztHurdle      500 0.068    0      0.0475 0.0937 "***" 
## 68 ztoinegbin      Hurdlezt      500 0.682    0      0.639  0.723  "***" 
## 69 ztoinegbin      correct       493 0.945    0.605  0.921  0.964  ""    
## 70 ztoinegbin      geom          500 0        0      0      0.0074 "***" 
## 71 ztoinegbin      oizt          500 0.702    0      0.660  0.742  "***" 
## 72 ztoinegbin      poisson       500 0        0      0      0.0074 "***" 
## 73 ztoinegbin      wrong_link    494 0.951    1      0.929  0.969  ""    
## 74 ztoinegbin      ztHurdle      491 0.768    0      0.728  0.804  "***" 
## 75 ztoipoisson     Hurdlezt      499 0.599    0      0.555  0.642  "***" 
## 76 ztoipoisson     correct       496 0.938    0.215  0.912  0.957  ""    
## 77 ztoipoisson     geometric     500 0        0      0      0.0074 "***" 
## 78 ztoipoisson     oizt          490 0.876    0      0.843  0.903  "***" 
## 79 ztoipoisson     wrong_link    497 0.940    0.302  0.915  0.959  ""    
## 80 ztoipoisson     ztHurdle      499 0.00802  0      0.0022 0.0204 "***"
## show only those that have high p value or have better coverage
dd |>
  filter(p_value > .05 | mean > .95) |>
  mutate(p_value = round(p_value, digits = 4),
         lower   = round(lower, digits = 4),
         upper   = round(upper, digits = 4)) |>
  print(n = NROW(dd))
## # A tibble: 21 × 8
## # Groups:   data_generation [10]
##    data_generation data_fitted     n  mean p_value lower upper signif
##    <chr>           <chr>       <int> <dbl>   <dbl> <dbl> <dbl> <chr> 
##  1 Hurdleztgeom    correct       500 0.944   0.537 0.920 0.962 ""    
##  2 Hurdleztgeom    negbin        494 0.960   0.408 0.938 0.975 ""    
##  3 Hurdleztgeom    wrong_link    500 0.944   0.537 0.920 0.962 ""    
##  4 Hurdleztnegbin  correct       500 0.948   0.837 0.925 0.966 ""    
##  5 Hurdleztnegbin  wrong_link    500 0.948   0.837 0.925 0.966 ""    
##  6 Hurdleztpoisson correct       499 0.958   0.473 0.936 0.974 ""    
##  7 Hurdleztpoisson wrong_link    499 0.954   0.758 0.932 0.971 ""    
##  8 oiztgeom        Hurdlezt      500 0.952   0.918 0.929 0.969 ""    
##  9 ztHurdlegeom    correct       500 0.956   0.608 0.934 0.972 ""    
## 10 ztHurdlegeom    negbin        486 0.938   0.251 0.913 0.958 ""    
## 11 ztHurdlegeom    wrong_link    495 0.958   0.535 0.936 0.974 ""    
## 12 ztHurdlenegbin  correct       495 0.966   0.121 0.946 0.980 ""    
## 13 ztHurdlenegbin  wrong_link    497 0.966   0.122 0.946 0.98  ""    
## 14 ztHurdlepoisson correct       500 0.952   0.918 0.929 0.969 ""    
## 15 ztHurdlepoisson wrong_link    500 0.952   0.918 0.929 0.969 ""    
## 16 ztoigeom        correct       500 0.936   0.150 0.911 0.956 ""    
## 17 ztoigeom        wrong_link    500 0.934   0.101 0.909 0.954 ""    
## 18 ztoinegbin      correct       493 0.945   0.605 0.921 0.964 ""    
## 19 ztoinegbin      wrong_link    494 0.951   1     0.929 0.969 ""    
## 20 ztoipoisson     correct       496 0.938   0.215 0.912 0.957 ""    
## 21 ztoipoisson     wrong_link    497 0.940   0.302 0.915 0.959 ""

Visual results with confidence intervals:

qq3 <- dd |>
  ggplot(aes(x = data_fitted)) +
  facet_wrap(~ data_generation, scales = c("free_x"), ncol = 3) +
  geom_point(aes(y = mean), colour = "navy", cex = 2) +
  geom_errorbar(aes(ymin = lower, ymax = upper), colour = "navy") +
  ggtitle("Empirical coverage of log normal confidence intervals by true distribution of counts") +
  xlab("Fitted distribution") +
  ylab("Coverage")

qq3

Average sizes of confidence intervals:

qq4 <- results_data_frame |>
  subset(!is.na(point)) |>
  subset(point < 5000) |>
  group_by(data_generation, data_fitted) |>
  summarise(len = mean(conf_int_log_normal_upper - conf_int_log_normal_lower, na.rm = TRUE)) |>
  ggplot(aes(x = data_fitted, weight = len)) +
  geom_bar() +
  facet_wrap(~ data_generation, scales = "free", ncol = 3) +
  ylab("Average length") +
  xlab("Fitted distribution") +
  ggtitle("Empirical size of log normal confidence intervals by true distribution of counts")
## `summarise()` has grouped output by 'data_generation'. You can override using
## the `.groups` argument.
qq4